{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import requests\n",
    "from bs4 import BeautifulSoup\n",
    "from datetime import datetime"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "# brand_name = 'ONLY'"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [],
   "source": [
    "init_url = 'https://ONLY.tmall.com/category.htm'"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [],
   "source": [
    "tm_headers = {\n",
    "    'accept': \"text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8\",\n",
    "    'accept-encoding': \"gzip, deflate, br\",\n",
    "    'accept-language': \"zh-CN,zh;q=0.9\",\n",
    "    'referer': \"https://list.tmall.com/search_product.htm?type=p&q=%B0%AE%BE%D3%CD%C3\",\n",
    "    'upgrade-insecure-requests': \"1\",\n",
    "    'user-agent': \"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.110 Safari/537.36\",\n",
    "    }"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "tm_headers['cookie'] = "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "ename": "IndexError",
     "evalue": "list index out of range",
     "output_type": "error",
     "traceback": [
      "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[1;31mIndexError\u001b[0m                                Traceback (most recent call last)",
      "\u001b[1;32m<ipython-input-6-960136114340>\u001b[0m in \u001b[0;36m<module>\u001b[1;34m()\u001b[0m\n\u001b[0;32m      2\u001b[0m     \u001b[0mres\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0msess\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mget\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0minit_url\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mheaders\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mtm_headers\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m      3\u001b[0m     \u001b[0msoup\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mBeautifulSoup\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mres\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mtext\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;34m'html5lib'\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m----> 4\u001b[1;33m     \u001b[0mall_item\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0msoup\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mselect\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;34m'ul.i_menu1.mt_9 > li.all_nav111 > a[href]'\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;36m0\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m      5\u001b[0m     \u001b[0mall_item_url\u001b[0m  \u001b[1;33m=\u001b[0m \u001b[1;34m'https:'\u001b[0m \u001b[1;33m+\u001b[0m \u001b[0mall_item\u001b[0m \u001b[1;33m.\u001b[0m\u001b[0mattrs\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;34m'href'\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m      6\u001b[0m     \u001b[0mnew_params\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mall_item_url\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0msplit\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;34m'?'\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;33m-\u001b[0m\u001b[1;36m1\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
      "\u001b[1;31mIndexError\u001b[0m: list index out of range"
     ]
    }
   ],
   "source": [
    "with requests.Session() as sess:\n",
    "    res = sess.get(init_url, headers=tm_headers)\n",
    "    soup = BeautifulSoup(res.text, 'html5lib')\n",
    "    all_item = soup.select('ul.i_menu1.mt_9 > li.all_nav111 > a[href]')[0]\n",
    "    all_item_url  = 'https:' + all_item .attrs['href']\n",
    "    new_params = all_item_url.split('?')[-1]\n",
    "    datenum = str(round(datetime.timestamp(datetime.now())*1000))\n",
    "    class_url = start_url + new_params + '&pageNo:1'\n",
    "    ress = sess.get(class_url,headers=tm_headers)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 302,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'https://eichitoo.tmall.com/i/asynSearch.htm?_ksTS=1543500548774_116&callback=jsonp117&mid=w-15347103346-0&wid=15347103346&spm=a1z10.3-b-s.w4011-15347103346.107.66f4b8ffgOKYwb&&search=y&orderType=hotsell_desc&scene=taobao_shop&pageNo:1'"
      ]
     },
     "execution_count": 302,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "ress.url"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "payload = {\n",
    "    \n",
    "}"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 303,
   "metadata": {},
   "outputs": [],
   "source": [
    "page_text = ress.text"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 304,
   "metadata": {},
   "outputs": [],
   "source": [
    "page_text = page_text.replace('\\\\', '')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 305,
   "metadata": {
    "scrolled": false
   },
   "outputs": [],
   "source": [
    "page_content = BeautifulSoup(page_text, 'html5lib')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 306,
   "metadata": {
    "scrolled": false
   },
   "outputs": [
    {
     "ename": "IndexError",
     "evalue": "list index out of range",
     "output_type": "error",
     "traceback": [
      "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[1;31mIndexError\u001b[0m                                Traceback (most recent call last)",
      "\u001b[1;32m<ipython-input-306-aad58fa75e71>\u001b[0m in \u001b[0;36m<module>\u001b[1;34m()\u001b[0m\n\u001b[1;32m----> 1\u001b[1;33m \u001b[0mpage_content\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mselect\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;34m'b.ui-page-s-len'\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;36m0\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mtext\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m",
      "\u001b[1;31mIndexError\u001b[0m: list index out of range"
     ]
    }
   ],
   "source": [
    "page_content.select('b.ui-page-s-len')[0].text"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 299,
   "metadata": {
    "scrolled": false
   },
   "outputs": [],
   "source": [
    "for one_item in page_content.select('div.J_TItems > div.item5line1 > dl')[:10]:\n",
    "    print(one_item.select('dd.detail > a[href]')[0].attrs['href'])\n",
    "    print(one_item.select('dd.detail > div.attribute > div.cprice-area')[0].text)\n",
    "    print(one_item.select('dd.detail > div.attribute > div.coupon-area')[0].text)\n",
    "    print(one_item.select('dd.detail > div.attribute > div.sale-area')[0].text)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": 300,
   "metadata": {},
   "outputs": [],
   "source": [
    "with open('d:\\\\test\\\\ejutu.html', 'w') as f:\n",
    "    f.write(ress.text)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "scrolled": true
   },
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": 133,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": 144,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": 150,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[]"
      ]
     },
     "execution_count": 150,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.0"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
